package weka.core.converters;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.xml.XMLInstances;

/* loaded from: input_file:weka/core/converters/C45Loader.class */
public class C45Loader extends AbstractFileLoader implements BatchConverter, IncrementalConverter {
    static final long serialVersionUID = 5454329403218219L;
    public static String FILE_EXTENSION = ".names";
    private File m_sourceFileData = null;
    private transient Reader m_namesReader = null;
    private transient Reader m_dataReader = null;
    private String m_fileStem;
    private int m_numAttribs;
    private boolean[] m_ignore;

    public String globalInfo() {
        return "Reads a file that is C45 format. Can take a filestem or filestem with .names or .data appended. Assumes that path/<filestem>.names and path/<filestem>.data exist and contain the names and data respectively.";
    }

    @Override // weka.core.converters.AbstractFileLoader, weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public void reset() throws IOException {
        this.m_structure = null;
        setRetrieval(0);
        if (this.m_File != null) {
            setFile(new File(this.m_File));
        }
    }

    @Override // weka.core.converters.FileSourcedConverter
    public String getFileExtension() {
        return FILE_EXTENSION;
    }

    @Override // weka.core.converters.FileSourcedConverter
    public String[] getFileExtensions() {
        return new String[]{".names", ".data"};
    }

    @Override // weka.core.converters.FileSourcedConverter
    public String getFileDescription() {
        return "C4.5 data files";
    }

    @Override // weka.core.converters.AbstractFileLoader, weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public void setSource(File file) throws IOException {
        String substring;
        String str;
        this.m_structure = null;
        setRetrieval(0);
        if (file == null) {
            throw new IOException("Source file object is null!");
        }
        String name = file.getName();
        String parent = file.getParent();
        String str2 = parent != null ? String.valueOf(parent) + File.separator : "";
        if (name.indexOf(46) < 0) {
            substring = name;
            str = String.valueOf(name) + ".names";
        } else {
            substring = name.substring(0, name.lastIndexOf(46));
            str = String.valueOf(substring) + ".names";
        }
        this.m_fileStem = substring;
        File file2 = new File(String.valueOf(str2) + str);
        this.m_sourceFile = file2;
        try {
            this.m_namesReader = new BufferedReader(new FileReader(file2));
            this.m_sourceFileData = new File(String.valueOf(str2) + substring + ".data");
            try {
                this.m_dataReader = new BufferedReader(new FileReader(this.m_sourceFileData));
                this.m_File = file2.getAbsolutePath();
            } catch (FileNotFoundException e) {
                throw new IOException("File not found : " + str2 + str);
            }
        } catch (FileNotFoundException e2) {
            throw new IOException("File not found : " + str2 + str);
        }
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getStructure() throws IOException {
        if (this.m_sourceFile == null) {
            throw new IOException("No source has beenspecified");
        }
        if (this.m_structure == null) {
            setSource(this.m_sourceFile);
            StreamTokenizer streamTokenizer = new StreamTokenizer(this.m_namesReader);
            initTokenizer(streamTokenizer);
            readHeader(streamTokenizer);
        }
        return this.m_structure;
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getDataSet() throws IOException {
        if (this.m_sourceFile == null) {
            throw new IOException("No source has been specified");
        }
        if (getRetrieval() == 2) {
            throw new IOException("Cannot mix getting Instances in both incremental and batch modes");
        }
        setRetrieval(1);
        if (this.m_structure == null) {
            getStructure();
        }
        StreamTokenizer streamTokenizer = new StreamTokenizer(this.m_dataReader);
        initTokenizer(streamTokenizer);
        Instances instances = new Instances(this.m_structure);
        Instance c45Loader = getInstance(streamTokenizer);
        while (true) {
            Instance instance = c45Loader;
            if (instance == null) {
                try {
                    break;
                } catch (Exception e) {
                    e.printStackTrace();
                }
            } else {
                instances.add(instance);
                c45Loader = getInstance(streamTokenizer);
            }
        }
        this.m_dataReader.close();
        return instances;
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instance getNextInstance(Instances instances) throws IOException {
        if (this.m_sourceFile == null) {
            throw new IOException("No source has been specified");
        }
        if (getRetrieval() == 1) {
            throw new IOException("Cannot mix getting Instances in both incremental and batch modes");
        }
        setRetrieval(2);
        if (this.m_structure == null) {
            getStructure();
        }
        StreamTokenizer streamTokenizer = new StreamTokenizer(this.m_dataReader);
        initTokenizer(streamTokenizer);
        Instance c45Loader = getInstance(streamTokenizer);
        if (c45Loader != null) {
            c45Loader.setDataset(this.m_structure);
        } else {
            try {
                this.m_dataReader.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return c45Loader;
    }

    private Instance getInstance(StreamTokenizer streamTokenizer) throws IOException {
        double[] dArr = new double[this.m_structure.numAttributes()];
        StreamTokenizerUtils.getFirstToken(streamTokenizer);
        if (streamTokenizer.ttype == -1) {
            return null;
        }
        int i = 0;
        for (int i2 = 0; i2 < this.m_numAttribs; i2++) {
            if (i2 > 0) {
                StreamTokenizerUtils.getToken(streamTokenizer);
            }
            if (!this.m_ignore[i2]) {
                if (streamTokenizer.ttype == 63) {
                    int i3 = i;
                    i++;
                    dArr[i3] = Utils.missingValue();
                } else {
                    String str = streamTokenizer.sval;
                    if (i2 == this.m_numAttribs - 1 && str.charAt(str.length() - 1) == '.') {
                        str = str.substring(0, str.length() - 1);
                    }
                    if (this.m_structure.attribute(i).isNominal()) {
                        int indexOfValue = this.m_structure.attribute(i).indexOfValue(str);
                        if (indexOfValue == -1) {
                            StreamTokenizerUtils.errms(streamTokenizer, "nominal value not declared in header :" + str + " column " + i2);
                        }
                        int i4 = i;
                        i++;
                        dArr[i4] = indexOfValue;
                    } else if (this.m_structure.attribute(i).isNumeric()) {
                        try {
                            int i5 = i;
                            i++;
                            dArr[i5] = Double.valueOf(str).doubleValue();
                        } catch (NumberFormatException e) {
                            StreamTokenizerUtils.errms(streamTokenizer, "number expected");
                        }
                    } else {
                        System.err.println("Shouldn't get here");
                        System.exit(1);
                    }
                }
            }
        }
        return new DenseInstance(1.0d, dArr);
    }

    private String removeTrailingPeriod(String str) {
        if (str.charAt(str.length() - 1) == '.') {
            str = str.substring(0, str.length() - 1);
        }
        return str;
    }

    private void readHeader(StreamTokenizer streamTokenizer) throws IOException {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        StreamTokenizerUtils.getFirstToken(streamTokenizer);
        if (streamTokenizer.ttype == -1) {
            StreamTokenizerUtils.errms(streamTokenizer, "premature end of file");
        }
        this.m_numAttribs = 1;
        ArrayList arrayList3 = new ArrayList();
        while (streamTokenizer.ttype != 10) {
            String trim = streamTokenizer.sval.trim();
            if (trim.length() > 0) {
                arrayList3.add(removeTrailingPeriod(trim));
            }
            StreamTokenizerUtils.getToken(streamTokenizer);
        }
        int i = 0;
        while (streamTokenizer.ttype != -1) {
            StreamTokenizerUtils.getFirstToken(streamTokenizer);
            if (streamTokenizer.ttype != -1) {
                String str = streamTokenizer.sval;
                StreamTokenizerUtils.getToken(streamTokenizer);
                if (streamTokenizer.ttype == 10) {
                    StreamTokenizerUtils.errms(streamTokenizer, "premature end of line. Expected attribute type.");
                }
                String trim2 = streamTokenizer.sval.toLowerCase().trim();
                if (trim2.startsWith("ignore") || trim2.startsWith(XMLInstances.TAG_LABEL)) {
                    arrayList2.add(new Integer(i));
                    i++;
                } else if (trim2.startsWith("continuous")) {
                    arrayList.add(new Attribute(str));
                    i++;
                } else {
                    i++;
                    ArrayList arrayList4 = new ArrayList();
                    while (streamTokenizer.ttype != 10 && streamTokenizer.ttype != -1) {
                        String trim3 = streamTokenizer.sval.trim();
                        if (trim3.length() > 0) {
                            arrayList4.add(removeTrailingPeriod(trim3));
                        }
                        StreamTokenizerUtils.getToken(streamTokenizer);
                    }
                    arrayList.add(new Attribute(str, arrayList4));
                }
            }
        }
        boolean z = true;
        int i2 = -1;
        if (arrayList3.size() == 1) {
            i2 = 0;
            while (true) {
                if (i2 >= arrayList.size()) {
                    break;
                }
                if (((Attribute) arrayList.get(i2)).name().compareTo((String) arrayList3.get(0)) == 0) {
                    z = false;
                    this.m_numAttribs--;
                    break;
                }
                i2++;
            }
        }
        if (z) {
            arrayList.add(new Attribute("Class", arrayList3));
        }
        this.m_structure = new Instances(this.m_fileStem, (ArrayList<Attribute>) arrayList, 0);
        try {
            if (z) {
                this.m_structure.setClassIndex(this.m_structure.numAttributes() - 1);
            } else {
                this.m_structure.setClassIndex(i2);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        this.m_numAttribs = this.m_structure.numAttributes() + arrayList2.size();
        this.m_ignore = new boolean[this.m_numAttribs];
        for (int i3 = 0; i3 < arrayList2.size(); i3++) {
            this.m_ignore[((Integer) arrayList2.get(i3)).intValue()] = true;
        }
    }

    private void initTokenizer(StreamTokenizer streamTokenizer) {
        streamTokenizer.resetSyntax();
        streamTokenizer.whitespaceChars(0, 31);
        streamTokenizer.wordChars(32, 255);
        streamTokenizer.whitespaceChars(44, 44);
        streamTokenizer.whitespaceChars(58, 58);
        streamTokenizer.commentChar(124);
        streamTokenizer.whitespaceChars(9, 9);
        streamTokenizer.quoteChar(34);
        streamTokenizer.quoteChar(39);
        streamTokenizer.eolIsSignificant(true);
    }

    @Override // weka.core.RevisionHandler
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 9290 $");
    }

    public static void main(String[] strArr) {
        runFileLoader(new C45Loader(), strArr);
    }
}
